import time
import faiss
import numpy as np
from encode.text_encoder import TextToVec
# from to_json.load_json import read_json
from to_json import load_json

text2vec = TextToVec('../model/bge-large-zh-v1_5_/bge-large-zh-v1_5_')

if __name__ == "__main__":

    print('读取json')
    # all_id_list, all_paper_list = load_json.read_json('../data/id_content_dict.json')
    all_id_list, all_paper_list = load_json.read_json('../data/R5_id_content_dict.json')

    assert len(all_id_list) == len(all_paper_list)
    print('数据量：', len(all_id_list))

    index = faiss.IndexFlatL2(1024)
    # 获得数据库的 law_id 作为索引
    index2 = faiss.IndexIDMap2(index)

    start_time = time.time()
    paper_embedding = text2vec.list_text2vec(all_paper_list)
    end_time = time.time()
    print('条文转向量用时', end_time - start_time)

    paper_array = np.array(paper_embedding)
    # 替换索引
    assert len(paper_array) == len(all_id_list) == len(all_paper_list)
    index2.add_with_ids(paper_array, all_id_list)
    # 写入
    start_time = time.time()
    faiss.write_index(index2, "../vector/R5_IPO_map2_id_content_dict_bge_large_zh_v1_5.faiss")
    end_time = time.time()
    print('写入用时', end_time - start_time)




